import urllib.request as ur
import lxml.etree as le
import user_agent
import time
import random
import re

keyword = input('请输入你的关键词：')
pn_start = int(input('起始页：'))
pn_end = int(input('终止页：'))


def getRequest(url):
    return ur.Request(
        url=url,
        headers={
            'User-Agent': user_agent.get_user_agent_pc(),
            'Cookie': 'TY_SESSION_ID=2d261346-8d6e-433a-ae15-d45d0a49cef1; JSESSIONID=2D7DA02E5C730691244E3E90A64A383F; uuid_tt_dd=10_20709831710-1571148669170-291011; dc_session_id=10_1571148669170.114225; UserName=weixin_45609519; UserInfo=e80f774e3f514ebc80d1b6a417943611; UserToken=e80f774e3f514ebc80d1b6a417943611; UserNick=%E9%A3%8E%E5%BD%B1%E8%9D%89%E9%B8%A3; AU=A3D; UN=weixin_45609519; BT=1571148702104; p_uid=U000000; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_20709831710-1571148669170-291011!5744*1*weixin_45609519; __gads=Test; UM_distinctid=16e349660e19fe-0b9c5ae477be83-34564978-144000-16e349660e3a9e; firstDie=1; acw_tc=2760823e15742623614062837eb2a90225da7e7edf37324a9c91ec147893f6; __yadk_uid=jEkr01UHfVL2fg6b3FTLMmMtJrPg5920; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1574277530,1574277560,1574277701,1574314399; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblogdev.blog.csdn.net%252Farticle%252Fdetails%252F103053996%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; dc_tos=q1b0sl; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1574314438'
        }
    )


for pn in range(pn_start, pn_end+1):
    time.sleep(random.random() * 3)
    url = 'https://so.csdn.net/so/search/s.do?p=%s&q=%s&t=blog&viparticle=&domain=&o=&s=&u=&l=&f=&rbg=0' % (pn, keyword)
    request = getRequest(url)
    response = ur.urlopen(request).read()
    href_s = le.HTML(response).xpath('//span[@class="down fr"]/../span[@class="link"]/a/@href')
    time.sleep(random.random()*3)
    print(href_s)
    for href in href_s:
        request = ur.Request(
            url=href,
            headers={
                'User-Agent': user_agent.get_user_agent_pc(),
                'Cookie': 'TY_SESSION_ID=2d261346-8d6e-433a-ae15-d45d0a49cef1; JSESSIONID=2D7DA02E5C730691244E3E90A64A383F; uuid_tt_dd=10_20709831710-1571148669170-291011; dc_session_id=10_1571148669170.114225; UserName=weixin_45609519; UserInfo=e80f774e3f514ebc80d1b6a417943611; UserToken=e80f774e3f514ebc80d1b6a417943611; UserNick=%E9%A3%8E%E5%BD%B1%E8%9D%89%E9%B8%A3; AU=A3D; UN=weixin_45609519; BT=1571148702104; p_uid=U000000; Hm_ct_6bcd52f51e9b3dce32bec4a3997715ac=6525*1*10_20709831710-1571148669170-291011!5744*1*weixin_45609519; __gads=Test; UM_distinctid=16e349660e19fe-0b9c5ae477be83-34564978-144000-16e349660e3a9e; firstDie=1; acw_tc=2760823e15742623614062837eb2a90225da7e7edf37324a9c91ec147893f6; __yadk_uid=jEkr01UHfVL2fg6b3FTLMmMtJrPg5920; Hm_lvt_6bcd52f51e9b3dce32bec4a3997715ac=1574277530,1574277560,1574277701,1574314399; announcement=%257B%2522isLogin%2522%253Atrue%252C%2522announcementUrl%2522%253A%2522https%253A%252F%252Fblogdev.blog.csdn.net%252Farticle%252Fdetails%252F103053996%2522%252C%2522announcementCount%2522%253A0%252C%2522announcementExpire%2522%253A3600000%257D; dc_tos=q1b0sl; Hm_lpvt_6bcd52f51e9b3dce32bec4a3997715ac=1574314438'
            }
        )
        response_blog = ur.urlopen(href).read()
        title = le.HTML(response_blog).xpath('//h1[@class="title-article"]/text()')
        print(title)
    #     with open('blog/%s.html' % title, 'wb') as f:
    #         f.write(response_blog)
